*********************************************************************************
			*****  Appendix E - Data Preparation *****
*********************************************************************************

*********************************************************************************
***** Set-up
*********************************************************************************

cap log close
clear all
timer clear
version 15.1
set more off
set matsize 10000

global path "C:\Users\Prof. Wiederhold\Desktop\Projects\RYL - Dokumente\Replication" //enter path where PIAAC/PIAAC-L data are stored

// Folder and paths
foreach folder in orig data prog log table figure results {
  global `folder' ${path}\\`folder'
}

** PIAAC GERMANY SUF

use "$orig\ZA5845_v2-2-0.dta", clear

rename * , lower	// all var names lower-case letters

*****************

** weight
ren spfwt0 weight_12

** standardize skills

** updated scores for 2012

forvalues i=1(1)10{

set more off 

sum pvnum`i' [w=weight_12], d 
gen num`i'_12=(pvnum`i'-r(mean))/r(sd)
sum num`i'_12 
label var num`i'_12 "Numeracy score 2012 Plausible Value `i' (standardized)"

sum pvlit`i' [w=weight_12], d 
gen lit`i'_12=(pvlit`i'-r(mean))/r(sd)
sum lit`i'_12 
label var lit`i'_12 "Literacy score 2012 Plausible Value `i' (standardized)"

sum pvpsl`i' [w=weight_12], d 
gen psl`i'_12=(pvpsl`i'-r(mean))/r(sd)
sum psl`i'_12 
label var psl`i'_12 "Problem-solving score 2012 Plausible Value `i' (standardized)"
}

** hourly earnings
clonevar earn_hour=earnhr 

gen trim_earn_hour=. //trim earn_hour betweeen 1 and 99 percentile
qui sum earn_hour, d
replace trim_earn_hour=earn_hour if earn_hour>=r(p1) & earn_hour<=r(p99) 

gen log_trim_earn_hour=log(trim_earn_hour)

** hourly earnings - PPP-USD
tab earnhrppp, m
clonevar earn_hourppp=earnhrppp

gen trim_earn_hourppp=. //trim earn_hour betweeen 1 and 99 percentile
qui sum earn_hourppp, d
replace trim_earn_hourppp=earn_hourppp if earn_hourppp>=r(p1) & earn_hourppp<=r(p99) 

gen log_trim_earn_hourppp=log(trim_earn_hourppp)

** monthly earnings (monthly earnings including bonuses for wage and salary earners and self-employed)
clonevar earnings=earnmthall

gen trim_earnings=. //trim earnings betweeen 1 and 99 percentile
qui sum earnings, d
replace trim_earnings=earnings if earnings>=r(p1) & earnings<=r(p99) 

gen log_trim_earnings=log(trim_earnings)

** gender
gen female=0 if gender_r==1		
replace female=1 if gender_r==2

label define gender  0 "Male" 1 "Female"
label values female gender  

** age
ren age_r age

** years of schooling
replace yrsqual=. if yrsqual==97 //don't know

** university degree
gen uni = .
replace uni=0 if b_q01ade2<6 | b_q01ade2==12 
replace uni=1 if b_q01ade2>=6 & b_q01ade2<=10
tab uni

** actual work experience
gen exp=c_q09

** potential work experience
gen pot_exp=age-yrsqual-6
gen pot_exp_squared=pot_exp^2

** migration status
gen migrant_first=0 
replace migrant_first=1 if j_q04a==2&(j_q06a!=1|j_q07a!=1)
replace migrant_first=. if j_q04a==.|j_q06a==.|j_q07a==.
label var migrant_first "First-gen migrant (participant born abroad; at least one parent also)"

gen migrant_second=0 
replace migrant_second=1 if (j_q06a==2|j_q07a==2)&j_q04a==1
replace migrant_second=. if (j_q06a==.&j_q07a==.)|j_q04a==.
label var migrant_second "Second-gen migrant (mother, father, or both born abroad; participant not)"

gen migrant=0
replace migrant=1 if migrant_first==1|migrant_second==1
replace migrant=. if migrant_first==.&migrant_second==.
label var migrant "First-gen or second-gen migrant"

** unemployed
gen unemployed=.
replace unemployed=1 if c_d05==2
replace unemployed=0 if c_d05==1
//note that persons out of the labor force are missing in this specification (as well as persons who don't know their status)
	
** unemployed or out of the labor force
gen nonemployed=.
replace nonemployed=1 if c_d05==2|c_d05==3 //Unemployed/Out of the labour force 
replace nonemployed=0 if c_d05==1 //Employed
replace nonemployed=1 if c_d05==4 //Not known==nonemployed
//here, persons out of the labor force are included

** education parents
gen uni_mother = j_q06b==3 //ISCED 5 and 6
gen uni_father = j_q07b==3 //ISCED 5 and 6

** books at home at age of 16
clonevar books=j_q08
replace books=. if missing(books) //there are some .a missings

** professional qualification
/*Welchen höchsten beruflichen Ausbildungsabschluss bzw. Hochschulabschluss haben Sie?*/
clonevar professional_qualification=b_q01ade2

** uncompleted qualification
/*Haben Sie je versucht, einen schulischen oder beruflichen Abschluss bzw. Hochschulabschluss zu erreichen, haben dann aber aufgehört, bevor Sie den Abschluss erworben haben? 
--> Was für einen beruflichen Abschluss haben Sie angestrebt? Wenn es mehr als einer war, nennen Sie mir bitte den höchsten.*/
clonevar uncompleted_qualification=b_q03bde2

** currently in education (question on uncompleted qualification was not asked to people who are currently in education)
/*
01 Ja, eine allgemeinbildende Schule
02 Ja, eine Berufsausbildung oder eine Hochschule
03 Ja, sowohl eine allgemeinbildende Schule als auch eine Berufsausbildung bzw. eine Hochschule
04 Nein 
*/
gen currently_in_education=.
replace currently_in_education=1 if b_q02ade==1|b_q02ade==2|b_q02ade==3
replace currently_in_education=0 if b_q02ade==4

** ADD SUFFIX _12 TO RELEVANT VARIABLES

foreach var of varlist pvnum1 pvlit1 pvpsl1 ///
earn_hour trim_earn_hour log_trim_earn_hour log_trim_earn_hourppp earnings trim_earnings log_trim_earnings unemployed nonemployed ///
age female yrsqual edcat7 uni exp pot_exp pot_exp_squared ///
migrant_first migrant_second migrant ///
uni_mother uni_father pared ///
books professional_qualification uncompleted_qualification currently_in_education ///
cnt_brth ///
$indIND $varI {
ren `var' `var'_12
}

*****************

** KEEP ONLY RELEVANT VARIABLES
keep seqid lng_l1 *_12 

** sort by seqid (for merge later on)
sort seqid 

saveold "$data\SUF_PIAAC_Replication.dta", replace

*******************************************************************************************************
*******************************************************************************************************

** PIAAC-L, WAVE 1

use "$orig\ZA5989_Persons_14_v3-0-0", clear

rename * , lower	// all var names lower-case letters

*****************

** age

ren age_r_14 age_14

*****************

** gender

gen female_14=0 if lsex_14==1		
replace female_14=1 if lsex_14==2

label define gender  0 "Male" 1 "Female"
label values female_14 gender  

*****************

** migration status

gen born_germany=0 if lgebd_14==2
replace born_germany=1 if lgebd_14==1

gen born_germany_father=0 if lv03a_14==2
replace born_germany_father=1 if lv03a_14==1

gen born_germany_mother=0 if lm03a_14==2
replace born_germany_mother=1 if lm03a_14==1

gen migrant_first_14=0 
replace migrant_first_14=1 if born_germany==0&(born_germany_father==0|born_germany_mother==0)
replace migrant_first_14=. if born_germany==.|born_germany_father==.|born_germany_mother==.
label var migrant_first_14 "First-gen migrant (participant born abroad; at least one parent also)"

gen migrant_second_14=0 
replace migrant_second_14=1 if (born_germany_father==0|born_germany_mother==0)&born_germany==1
replace migrant_second_14=. if (born_germany_father==.&born_germany_mother==.)|born_germany==.
label var migrant_second_14 "Second-gen migrant (mother, father, or both born abroad; participant not)"

gen migrant_14=0
replace migrant_14=1 if migrant_first_14==1|migrant_second_14==1
replace migrant_14=. if migrant_first_14==.&migrant_second_14==.
label var migrant_14 "First-gen or second-gen migrant"


*****************

** PARENTAL EDUCATION

** parental education (mother and father separately)

gen edu_father_14=.
replace edu_father_14=0 if lv06_14==-1
replace edu_father_14=1 if lv06_14==3
replace edu_father_14=2 if lv06_14==1
replace edu_father_14=3 if lv06_14==2

#delimit;
label define edu_father_14l  
		  0 "Don't know/refused" 
		  1 "No vocational or university"
		  2 "Vocational"
		  3 "University",
		 modify
;
#delimit cr

label values edu_father_14 edu_father_14l

gen uni_father_14=.
replace uni_father_14=0 if edu_father_14==0|edu_father_14==1|edu_father_14==2
replace uni_father_14=1 if edu_father_14==3

gen edu_mother_14=.
replace edu_mother_14=0 if lm06_14==-1
replace edu_mother_14=1 if lm06_14==3
replace edu_mother_14=2 if lm06_14==1
replace edu_mother_14=3 if lm06_14==2

#delimit;
label define edu_mother_14l   
		  0 "Don't know/refused" 
		  1 "No vocational or university"
		  2 "Vocational"
		  3 "University",
		 modify
;
#delimit cr

label values edu_mother_14 edu_mother_14l

gen uni_mother_14=.
replace uni_mother_14=0 if edu_mother_14==0|edu_mother_14==1|edu_mother_14==2
replace uni_mother_14=1 if edu_mother_14==3


*****

** How may of your first 15 years have you lived with parents?

* with both biological parents
clonevar years_with_both_parents_14=lkh03_14
replace years_with_both_parents_14=0 if years_with_both_parents_14==-5 //extra category for those who don't know their parents (they probably did not spend any time with them)

* with mother alone 
clonevar years_with_motheralone_14=lkh04_14
replace years_with_motheralone_14=0 if years_with_motheralone_14==-5 //extra category for "not applicable"

* with mother and new partner
clonevar years_with_motherpartner_14=lkh05_14
replace years_with_motherpartner_14=0 if years_with_motherpartner_14==-5 //extra category for "not applicable"

* with father alone
clonevar years_with_fatheralone_14=lkh06_14
replace years_with_fatheralone_14=0 if years_with_fatheralone_14==-5 //extra category for "not applicable"

* with father and new partner
clonevar years_with_fatherpartner_14=lkh07_14
replace years_with_fatherpartner_14=0 if years_with_fatherpartner_14==-5 //extra category for "not applicable"

* with foster parents
clonevar years_with_foster_14=lkh09_14
replace years_with_foster_14=0 if years_with_foster_14==-5 //extra category for "not applicable"

* non-single parent 

//asumption: you had both parents at the age of 15 if you lived all your life with both of them (very conservative)
#delimit ;
gen nonsingleparent_inclstep_14 =   years_with_both_parents_14==15  | 
									years_with_motherpartner_14==15 | 
									years_with_fatherpartner_14==15 |
									years_with_foster_14==15
;
#delimit cr

tab nonsingleparent_inclstep_14 //83% non-single parents

*****

** non-cognitive skills/Big Five (variables pego01_14-pego16_14)

* conscientiousness
clonevar bfi_con1=pego01_14		
replace bfi_con1=. if bfi_con1==-3

clonevar bfi_con2=pego07_14	// must be reversed	 
replace bfi_con2=. if bfi_con2==-3

clonevar bfi_con3=pego11_14	
replace bfi_con3=. if bfi_con3==-3

* extraversion
clonevar bfi_extra1=pego02_14
replace bfi_extra1=. if bfi_extra1==-3

clonevar bfi_extra2=pego08_14
replace bfi_extra2=. if bfi_extra2==-3

clonevar bfi_extra3=pego12_14 // must be reversed
replace bfi_extra3=. if bfi_extra3==-3

* agreeableness
clonevar bfi_agree1=pego03_14 // must be reversed
replace bfi_agree1=. if bfi_agree1==-3

clonevar bfi_agree2=pego06_14 
replace bfi_agree2=. if bfi_agree2==-3

clonevar bfi_agree3=pego13_14 
replace bfi_agree3=. if bfi_agree3==-3

* openness
clonevar bfi_open1=pego04_14
replace bfi_open1=. if bfi_open1==-3

clonevar bfi_open2=pego09_14
replace bfi_open2=. if bfi_open2==-3

clonevar bfi_open3=pego14_14
replace bfi_open3=. if bfi_open3==-3

clonevar bfi_open4=pego16_14
replace bfi_open4=. if bfi_open4==-3

* neuroticism
clonevar bfi_neur1=pego05_14
replace bfi_neur1=. if bfi_neur1==-3

clonevar bfi_neur2=pego10_14
replace bfi_neur2=. if bfi_neur2==-3

clonevar bfi_neur3=pego15_14 // must be reversed
replace bfi_neur3=. if bfi_neur3==-3

* recode
recode bfi_con2 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1), gen(bfi_con2r)
recode bfi_extra3 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1), gen(bfi_extra3r)
recode bfi_agree1 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1), gen(bfi_agree1r)
recode bfi_neur3 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1), gen(bfi_neur3r)

* generate Big-5

egen bfi_extra = rmean(bfi_extra1 bfi_extra2 bfi_extra3r)
	la var bfi_extra "Big-5 Extraversion"
egen bfi_neur = rmean(bfi_neur1 bfi_neur2 bfi_neur3r)
	la var bfi_neur "Big-5 Neuroticism"
egen bfi_open = rmean(bfi_open1 bfi_open2 bfi_open3 bfi_open4)
	la var bfi_open "Big-5 Openness to experience"
egen bfi_con = rmean(bfi_con1 bfi_con2r bfi_con3)
	la var bfi_con "Big-5 Conscientiousness"
egen bfi_agree = rmean(bfi_agree1r bfi_agree2 bfi_agree3)
	la var bfi_agree "Big-5 Agreeableness"

*****

** grit (variables piq01_14-05_14)
clonevar grit1=piq01_14
replace grit1=. if grit1==-3

clonevar grit2=piq02_14
replace grit2=. if grit2==-3

clonevar grit3=piq03_14
replace grit3=. if grit3==-3

clonevar grit4=piq04_14
replace grit4=. if grit4==-3

clonevar grit5=piq05_14
replace grit5=. if grit5==-3
recode grit5 (1=5) (2=4) (3=3) (4=2) (5=1) 

egen grit = rmean(grit1 grit2 grit3 grit4 grit5)
	la var grit "Grit"
	
*****

** locus of control (variables pzu01_14-pzu10_14) --> don't use pzu02_14 and pzu10_14 to construct the variables

clonevar inloc1=pzu01_14
replace inloc1=. if inloc1==-3

clonevar inloc2=pzu05_14
replace inloc2=. if inloc2==-3

clonevar inloc3=pzu08_14
replace inloc3=. if inloc3==-3
recode inloc3 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

clonevar inloc4=pzu10_14
replace inloc4=. if inloc4==-3

egen inloc = rmean(inloc1 inloc2 inloc3)
	la var inloc "Internal locus of control"
	
***
	
clonevar exloc1=pzu02_14
replace exloc1=. if exloc1==-3
recode exloc1 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

clonevar exloc2=pzu03_14
replace exloc2=. if exloc2==-3
recode exloc2 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

clonevar exloc3=pzu04_14
replace exloc3=. if exloc3==-3
recode exloc3 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

clonevar exloc4=pzu06_14
replace exloc4=. if exloc4==-3
recode exloc4 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

clonevar exloc5=pzu07_14
replace exloc5=. if exloc5==-3
recode exloc5 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

clonevar exloc6=pzu09_14
replace exloc6=. if exloc6==-3
recode exloc6 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

egen exloc = rmean(exloc1 exloc2 exloc3 exloc4 exloc5 exloc6)
	la var exloc "External locus of control"

*****

** reciprocity and other attitudes (pmas01_14-pmas11_14)

//risk aversion (0: completely unwilling to take risks; 10: completely willing to take risks)
clonevar risk_attitude=prisk_14
replace risk_attitude=. if risk_attitude==-3

//trust
clonevar trust1=pbez01_14
replace trust1=. if trust1==-3
recode trust1 (1=4) (2=3) (3=2) (4=1) 

clonevar trust2=pbez02_14
replace trust2=. if trust2==-3

clonevar trust3=pbez03_14
replace trust3=. if trust3==-3

egen trust = rmean(trust1 trust2 trust3)
	la var trust "Trust"

*****

** professional qualification

* Ausbildung
tab lab02_14, nol
gen apprenticeship_14=0 if lab02_14==-5
replace apprenticeship_14=1 if lab02_14==1

* Berufsfachschule
tab lab03_14, nol
gen vocationalschool_14=0 if lab03_14==-5
replace vocationalschool_14=1 if lab03_14==1

* Fachschule
tab lab04_14, nol
gen technicalschool_14=0 if lab04_14==-5
replace technicalschool_14=1 if lab04_14==1

* Beamtenausbildung
tab lab05_14, nol
gen civilservanttraining_14=0 if lab05_14==-5
replace civilservanttraining_14=1 if lab05_14==1

* Fachhochschule
tab lab06_14, nol
gen technicalcollege_14=0 if lab06_14==-5
replace technicalcollege_14=1 if lab06_14==1

* Universität
tab lab07_14, nol
gen university_14=0 if lab07_14==-5
replace university_14=1 if lab07_14==1
	
*****************

** SECONDARY EDUCATION

** school grades

//Note: control for grades using dummies and assign zero to missing values (grade information is missing from 243 partners)

clonevar grade_german_14=lsch1_14
replace grade_german_14=. if grade_german_14==-3|grade_german_14==7 //grade missing/did not have subject

clonevar grade_math_14=lsch2_14
replace grade_math_14=. if grade_math_14==-3|grade_math_14==7  //grade missing/did not have subject

clonevar grade_foreign_14=lsch3_14
replace grade_foreign_14=. if grade_foreign_14==-3|grade_foreign_14==7  //grade missing/did not have subject

** secondary school track

clonevar level_secondary_14=lsab7_14 
replace level_secondary_14=0 if lsab3_14==3 			//foreign qualification
replace level_secondary_14=7 if lsab2_14==1			//still at school
replace level_secondary_14=99 if level_secondary_14==-5   //missing

label define lsab7_14 0 "foreign qualification" 7 "still at school" 99 "missing", modify
label values level_secondary_14 lsab7_14


*****************

** ADD SUFFIX _14 TO RELEVANT VARIABLES

foreach var of varlist bfi_extra bfi_neur bfi_open bfi_con bfi_agree grit inloc exloc risk_attitude trust  {
ren `var' `var'_14
}

*****************

** KEEP ONLY RELEVANT VARIABLES

#delimit;

keep seqid pnrfestid pnr hnrid	
age_14 female_14 migrant_first_14 migrant_second_14	migrant_14 uni_father_14 uni_mother_14
bfi_extra_14 bfi_neur_14 bfi_open_14 bfi_con_14 bfi_agree_14 grit_14 inloc_14 exloc_14 risk_attitude_14 trust_14
grade_german_14 grade_math_14 grade_foreign_14 level_secondary_14 
apprenticeship_14 vocationalschool_14 technicalschool_14 civilservanttraining_14 technicalcollege_14 university_14
nonsingleparent_inclstep_14
;

#delimit cr

**sort by seqid (for merge with SUF later on)
sort seqid

saveold "$data\PIAAC-L_Wave1_Replication.dta", replace

*******************************************************************************************************
*******************************************************************************************************

** PIAAC-L, WAVE 2

use "$orig\ZA5989_Persons_15_v3-0-0", clear

rename * , lower	// all var names lower-case letters

** age
clonevar age_15=age_r_15
replace age_15=. if age_15==-7|age_15==-5|age_15==-3

** migration status
replace j_q04a_15=. if j_q04a_15==-4|j_q04a_15==-1
replace j_q06a_15=. if j_q06a_15==-4|j_q06a_15==-1
replace j_q07a_15=. if j_q07a_15==-4|j_q07a_15==-2|j_q07a_15==-1

gen migrant_first_15=0 
replace migrant_first_15=1 if j_q04a_15==2&(j_q06a_15!=1|j_q07a_15!=1)
replace migrant_first_15=. if j_q04a_15==.|j_q06a_15==.|j_q07a_15==.
label var migrant_first_15 "First-gen migrant (participant born abroad; at least one parent also)"

gen migrant_second_15=0 
replace migrant_second_15=1 if (j_q06a_15==2|j_q07a_15==2)&j_q04a_15==1
replace migrant_second_15=. if (j_q06a_15==.&j_q07a_15==.)|j_q04a_15==.
label var migrant_second_15 "Second-gen migrant (mother, father, or both born abroad; participant not)"

gen migrant_15=0
replace migrant_15=1 if migrant_first_15==1|migrant_second_15==1
replace migrant_15=. if migrant_first_15==.&migrant_second_15==.
label var migrant_15 "First-gen or second-gen migrant"

** books at home at age of 16
clonevar books_15=j_q08_15
replace books_15=. if books_15==-1|books_15==-2 //refused/don't know

** KEEP ONLY RELEVANT VARIABLES

#delimit;
keep seqid pnrfestid pnr hnrid	 	
age_15  migrant_first_15 migrant_second_15 migrant_15  books_15
;

#delimit cr

**sort by pnrfestid (for merge later on)
sort pnrfestid

saveold "$data\PIAAC-L_Wave2_Replication.dta", replace

*******************************************************************************************************
*******************************************************************************************************

** PIAAC-L, WAVE 3

use "$orig\ZA5989_Persons_16_v3-0-0", clear

rename * , lower	// all var names lower-case letters

*****************

** age

ren age_r_16 age_16
replace age_16=. if age_16<0 //4 persons refused to report the age

*****************

** gender

gen female_16=0 if lsex_16==1		
replace female_16=1 if lsex_16==2

label define gender  0 "Male" 1 "Female"
label values female_16 gender 

*****************

** migration

gen migrant_first_16=0 
replace migrant_first_16=1 if migrant1==2&migrant2!=1
replace migrant_first_16=. if migrant1==.|migrant2==.
label var migrant_first_16 "First-gen migrant (participant born abroad; at least one parent also)"

gen migrant_second_16=0 
replace migrant_second_16=1 if (migrant2==2|migrant2==3)&migrant1==1
replace migrant_second_16=. if migrant2==.|migrant1==.
label var migrant_second_16 "Second-gen migrant (mother, father, or both born abroad; participant not)"

gen migrant_16=0
replace migrant_16=1 if migrant_first_16==1|migrant_second_16==1
replace migrant_16=. if migrant_first_16==.&migrant_second_16==.
label var migrant_16 "First-gen or second-gen migrant"

*****************

** parental education

gen edu_father_16=0 if lv06==-4|lv06==-2|lv06==-1 
replace edu_father_16=1 if lv06==3 
replace edu_father_16=2 if lv06==1 
replace edu_father_16=3 if lv06==2

#delimit;
label define edu_father_16l  
		  0 "Don't know/refused" 
		  1 "No vocational or university"
		  2 "Vocational"
		  3 "University",
		 modify
;
#delimit cr

label values edu_father_16 edu_father_16l

gen uni_father_16=.
replace uni_father_16=0 if (edu_father_16==0|edu_father_16==1|edu_father_16==2)
replace uni_father_16=1 if edu_father_16==3

gen edu_mother_16=0 if lm06==-4|lm06==-2|lm06==-1 
replace edu_mother_16=0 if lm06==-1
replace edu_mother_16=1 if lm06==3 
replace edu_mother_16=2 if lm06==1 
replace edu_mother_16=3 if lm06==2

#delimit;
label define edu_mother_16l  
		  0 "Don't know" 
		  1 "No vocational or university"
		  2 "Vocational"
		  3 "University",
		 modify
;
#delimit cr

label values edu_mother_16 edu_mother_16l

gen uni_mother_16=.
replace uni_mother_16=0 if (edu_mother_16==0|edu_mother_16==1|edu_mother_16==2)
replace uni_mother_16=1 if edu_mother_16==3

*****************

** NON-COGNITIVE SKILLS

//Big Five (variables pego01-pegoxx)

* conscientiousness
clonevar bfi_con1=pego01		
replace bfi_con1=. if bfi_con1==-2|bfi_con1==-1

clonevar bfi_con2=pego07	// must be reversed	 
replace bfi_con2=. if bfi_con2==-2|bfi_con2==-1

clonevar bfi_con3=pego11
replace bfi_con3=. if bfi_con3==-2|bfi_con3==-1

* extraversion
clonevar bfi_extra1=pego02
replace bfi_extra1=. if bfi_extra1==-2|bfi_extra1==-1

clonevar bfi_extra2=pego08
replace bfi_extra2=. if bfi_extra2==-2|bfi_extra2==-1

clonevar bfi_extra3=pego12 // must be reversed
replace bfi_extra3=. if bfi_extra3==-2|bfi_extra3==-1

* agreeableness
clonevar bfi_agree1=pego03 // must be reversed
replace bfi_agree1=. if bfi_agree1==-2|bfi_agree1==-1

clonevar bfi_agree2=pego06 
replace bfi_agree2=. if bfi_agree2==-2|bfi_agree2==-1

clonevar bfi_agree3=pego13 
replace bfi_agree3=. if bfi_agree3==-2|bfi_agree3==-1

* openness
clonevar bfi_open1=pego04
replace bfi_open1=. if bfi_open1==-2|bfi_open1==-1

clonevar bfi_open2=pego09
replace bfi_open2=. if bfi_open2==-2|bfi_open2==-1

clonevar bfi_open3=pego14
replace bfi_open3=. if bfi_open3==-2|bfi_open3==-1

clonevar bfi_open4=pego16
replace bfi_open4=. if bfi_open4==-2|bfi_open4==-1

* neuroticism
clonevar bfi_neur1=pego05
replace bfi_neur1=. if bfi_neur1==-2|bfi_neur1==-1

clonevar bfi_neur2=pego10
replace bfi_neur2=. if bfi_neur2==-2|bfi_neur2==-1

clonevar bfi_neur3=pego15 // must be reversed
replace bfi_neur3=. if bfi_neur3==-2|bfi_neur3==-1

* recode
recode bfi_con2 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1), gen(bfi_con2r)
recode bfi_extra3 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1), gen(bfi_extra3r)
recode bfi_agree1 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1), gen(bfi_agree1r)
recode bfi_neur3 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1), gen(bfi_neur3r)

* generate Big-5

egen bfi_extra = rmean(bfi_extra1 bfi_extra2 bfi_extra3r)
	la var bfi_extra "Big-5 Extraversion"
egen bfi_neur = rmean(bfi_neur1 bfi_neur2 bfi_neur3r)
	la var bfi_neur "Big-5 Neuroticism"
egen bfi_open = rmean(bfi_open1 bfi_open2 bfi_open3 bfi_open4)
	la var bfi_open "Big-5 Openness to experience"
egen bfi_con = rmean(bfi_con1 bfi_con2r bfi_con3)
	la var bfi_con "Big-5 Conscientiousness"
egen bfi_agree = rmean(bfi_agree1r bfi_agree2 bfi_agree3)
	la var bfi_agree "Big-5 Agreeableness"

foreach var of varlist bfi_extra bfi_neur bfi_open bfi_con bfi_agree {
ren `var' `var'_16
}

*****

//locus of control (variables pzu01-pzu09) --> don't use pzu02 and pzu10 to construct the variables

clonevar inloc1=pzu01
replace inloc1=. if inloc1==-2|inloc1==-1

clonevar inloc2=pzu05
replace inloc2=. if inloc2==-2|inloc2==-1

clonevar inloc3=pzu08
replace inloc3=. if inloc3==-2|inloc3==-1
recode inloc3 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

clonevar inloc4=pzu10
replace inloc4=. if inloc4==-2|inloc4==-1

egen inloc = rmean(inloc1 inloc2 inloc3)
	la var inloc "Internal locus of control"

ren inloc inloc_16
	
*****
	
clonevar exloc1=pzu02
replace exloc1=. if exloc1==-2|exloc1==-1
recode exloc1 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

clonevar exloc2=pzu03
replace exloc2=. if exloc2==-2|exloc2==-1
recode exloc2 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

clonevar exloc3=pzu04
replace exloc3=. if exloc3==-2|exloc3==-1
recode exloc3 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

clonevar exloc4=pzu06
replace exloc4=. if exloc4==-2|exloc4==-1
recode exloc4 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

clonevar exloc5=pzu07
replace exloc5=. if exloc5==-2|exloc5==-1
recode exloc5 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

clonevar exloc6=pzu09
replace exloc6=. if exloc6==-2|exloc6==-1
recode exloc6 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) (7=1) 

egen exloc = rmean(exloc1 exloc2 exloc3 exloc4 exloc5 exloc6)
	la var exloc "External locus of control"

ren exloc exloc_16

*****

** secondary school track

clonevar level_secondary_16=lsab7_16 
replace level_secondary_16=0 if lsab3_16==3 		//foreign qualification
replace level_secondary_16=7 if lsab2_16==1			//still at school
replace level_secondary_16=99 if level_secondary_16==-4|level_secondary_16==-2   //missing

label define lsab7_16 0 "foreign qualification" 7 "still at school" 99 "missing", modify
label values level_secondary_16 lsab7_16

*****************

** KEEP ONLY RELEVANT VARIABLES

#delimit;

keep seqid pnrfestid pnr hnrid
age_16 female_16 migrant_first_16 migrant_second_16 migrant_16  
bfi_extra_16 bfi_neur_16 bfi_open_16 bfi_con_16 bfi_agree_16 inloc_16 exloc_16
level_secondary_16 
uni_father_16 uni_mother_16
;

#delimit cr

**sort by pnrfestid (for merge later on)
sort pnrfestid

saveold "$data\PIAAC-L_Wave3_Replication.dta", replace

*******************************************************************************************************
*******************************************************************************************************
*******************************************************************************************************
*******************************************************************************************************

** MERGE DATA

use "$data\SUF_PIAAC_Replication.dta", clear

** merge with PIAAC-L wave 1 
merge seqid using "$data\PIAAC-L_Wave1_Replication.dta"  
tab _merge			// 1=master only; 2=using only; 3=match
drop _merge

sort pnrfestid

** merge with PIAAC-L wave 2
merge pnrfestid using "$data\PIAAC-L_Wave2_Replication.dta"  
tab _merge		// 1=master; 2=using; 3=match
drop _merge

sort pnrfestid

** merge with PIAAC-L wave 3
merge pnrfestid using "$data\PIAAC-L_Wave3_Replication.dta"  
tab _merge		// 1=master; 2=using; 3=match
drop _merge

order seqid pnrfestid pnr hnrid	 	
sort pnrfestid

** anchor

gen anchor=0
replace anchor=1 if !missing(age_12) //all participants in PIAAC 2012 have age information
tab anchor

*******************************************************************************************************

** COMBINE VARIABLES ACROSS WAVES

** age

clonevar age=age_12
replace age=age_14 if age_12==.
replace age=age_15 if age_12==.&age_14==.
replace age=age_16 if age_12==.&age_14==.&age_15==.
drop age_14 age_15 age_16

** gender

clonevar female=female_12
replace female=female_14 if female_12==.
replace female=female_16 if female_12==.&female_14==.
drop female_12 female_14 female_16

** migrant status

clonevar migrant_first=migrant_first_12
replace migrant_first=migrant_first_14 if migrant_first_12==.
replace migrant_first=migrant_first_15 if migrant_first_12==.&migrant_first_14==.
drop migrant_first_12 migrant_first_14 migrant_first_15 migrant_first_16
label var migrant_first "First-gen migrant" 

clonevar migrant_second=migrant_second_12
replace migrant_second=migrant_second_14 if migrant_second_12==.
replace migrant_second=migrant_second_15 if migrant_second_12==.&migrant_second_14==.
drop migrant_second_12 migrant_second_14 migrant_second_15 migrant_second_16

clonevar migrant=migrant_12
replace migrant=migrant_14 if migrant_12==.
replace migrant=migrant_15 if migrant_12==.&migrant_14==.
replace migrant=migrant_16 if migrant_12==.&migrant_14==.&migrant_15==.
drop migrant_12 migrant_14 migrant_15  migrant_16

** books at home at age 16
clonevar books=books_12
replace books=books_15 if books_12==.
la var books "Number of books at home (at age 16)"

** non-cognitive skills

foreach var of newlist bfi_extra bfi_neur bfi_open bfi_con bfi_agree inloc exloc {
gen `var'=`var'_14
replace `var'=`var'_16 if `var'_14==.
drop `var'_16
}

la var bfi_extra "Big-5 Extraversion"
la var bfi_neur "Big-5 Neuroticism"
la var bfi_open "Big-5 Openness to experience"
la var bfi_con "Big-5 Conscientiousness"
la var bfi_agree "Big-5 Agreeableness"
la var inloc "Internal locus of control"
la var exloc "External locus of control"
//note that we only have 2014 values for grit, risk aversion, and trust

** education of parents

clonevar uni_father=uni_father_12
replace uni_father=uni_father_14 if missing(uni_father)
replace uni_father=uni_father_16 if missing(uni_father)
replace uni_father=0 if missing(uni_father) //Don't know/refused/missing

clonevar uni_mother=uni_mother_12
replace uni_mother=uni_mother_14 if missing(uni_mother)
replace uni_mother=uni_mother_16 if missing(uni_mother)
replace uni_mother=0 if missing(uni_mother) //Don't know/refused/missing

drop uni_father_14 uni_father_16 uni_mother_14 uni_mother_16

** secondary school track

clonevar level_secondary=level_secondary_14
replace level_secondary=level_secondary_16 if level_secondary_14==.
		
*******************************************************************************************************

** IMPUTE MISSINGS OF RELEVANT CONTROL VARIABLES

* assign missings for migrant and age (both of which we use as factor variables) a "99"
replace migrant=99 if migrant==.
label define migrant 99 "missing", replace
label values migrant migrant

replace age=99 if age==.
label values age migrant

* generate dummy for missings and assign zero to missings

foreach var in varlist grade_german_14 grade_math_14 grade_foreign_14 {
gen `var'_miss=0
replace `var'_miss=1 if `var'==.
replace `var'=0 if `var'==.
}

label define lsch1_14 0 "missing/did not have subject", modify
label values grade_german_14 lsch1_14

label define lsch2_14 0 "missing/did not have subject", modify
label values grade_math_14 lsch2_14

label define lsch3_14 0 "missing/did not have subject", modify
label values grade_foreign_14 lsch3_14

* generate dummy for missings and assign sample mean to missings

foreach var of varlist bfi_extra_14 bfi_neur_14 bfi_open_14 bfi_con_14 bfi_agree_14 grit_14 inloc_14 exloc_14 risk_attitude_14 trust_14  {
gen `var'_miss=0
replace `var'_miss=1 if `var'==.
sum `var'
replace `var'=r(mean) if `var'==. 
sum `var'
}

*******************************************************************************************************

** FURTHER VARIABLE GENERATION/LABELING

gen age_sq=age*age/100
label var age "Age"
label var age_sq "Age^2(/100)"

label var female "Female"


*******************************************************************************************************

** SAVE VARIABLES

saveold "$data\AppendixE_PIAAC.dta", replace

